home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 42
/
Amiga Format AFCD42 (Issue 126, Aug 1999).iso
/
-serious-
/
programming
/
arexx
/
import_html
/
import_html.rexx
< prev
next >
Wrap
OS/2 REXX Batch file
|
1999-05-25
|
11KB
|
253 lines
/* Import_HTML
$VER: Import_HTML 3.0 by MJ, Adrian Barnett, Heiko Kuschel
Imports an HTML (Web) file into Wordworth.
All HTML codes are stripped out, leaving
the text. Also, codes for different headings,
and bold, italic and underline styles are
recognised.
Digita ARexx Script for Wordworth 5
Copyright ©1996, Digita International Ltd.
Created: 2 July 1996
Author: MJ
Version 2.0
Modified: 7 May 1997
Author: Adrian Barnett (adrian@abarnett.demon.co.uk)
Changes: Handle long lines of text
Handle lists
Deal with newlines correctly
Center text correctly
Deal with lower-case html tags
This still needs a lot of work...
... ok, so I did some of it...
Version 3.0
Modified: 11 Nov 1998
Uploaded: June 1999 (sorry for the big delay)
Author: Heiko Kuschel <hkuschel@mayn.de>
Changes: Handle German Umlauts
Major speed improvement
Font names and sizes configurable
to do:
list font
rework the title handling
tables using TurboCalc
tables using Tabs (configurable)
More speed improvements possible!
IIRC German Umlauts are always printed UPPERCASE in this version.
I already had changed this, but lost the code.
*/
/* -------------- Configure here --------------- */
StandardFont = "Garamond Antiqua"
H1=25 /* Font sizes in pt.*/
H2=20
H3=18
H4=15
H5=14
H6=13
H0=12 /* This is the standard font size.*/
/* ----- Nothing to configure after this line. ----- */
OPTIONS RESULTS
numch = 0
RequestFile TITLE "Select HTML file..." PATTERN "(#?.HTML|#?.HTM)"
FileName = Result
If RC > 0 THEN
Exit
If Open('MyFile', FileName, R) THEN DO
New
Address Value Result
Document A4 "0.5in" "0.5in" "0.6in" "1.0in"
Zoom 100
Paragraph 0 0 0 LEFT AUTO SINGLE NONE NONE
Font NAME StandardFont SIZE H0 PLAIN
Para = ''
numch = 0
DO Until EOF('MyFile')
MyChar = ReadCh('MyFile')
/* Handle "<>" tags */
select
when MyChar = '<' THEN DO
Text Para
Para = ''
Code = MyChar
DO UNTIL MyChar = '>'
MyChar = ReadCh('MyFile')
Code = Code || MyChar
END
/* Convert tag to upper case */
Code = UPPER(Code)
SubCode = SubStr(Code, 1, 3)
SubCode2 = SubStr(Code, 1, 4)
SubCode3 = SubStr(Code, 1, 2)
select
when Code = '<P>' THEN do
NewParagraph
NewParagraph
end
when Code = '<BR>' THEN
NewParagraph
when Code = '<B>' THEN
Bold
when Code = '<I>' | Code = '<ADDRESS>' THEN
Italic
when Code = '<U>' THEN
Underline
when Code = '</B>' | Code = '</I>' | Code = '</U>' | Code = '</ADDRESS>' THEN
Plain
when SubCode3 = '<H' then DO
Newparagraph
select
when Code = '<H1>' THEN
Font SIZE H1
when Code = '<H2>' THEN
Font SIZE H2
when Code = '<H3>' THEN
Font SIZE H3
when Code = '<H4>' THEN
Font SIZE H4
when Code = '<H5>' THEN
Font SIZE H5
when Code = '<H6>' THEN
Font SIZE H6
when Code = '<HR>' THEN do
text "___________________________________________________________"
NewParagraph
end
otherwise NOP
end
end
when Code = '<LI>' THEN DO
NewParagraph
text "o "
END
when Code = '</UL>' | Code = '</OL>' | Code = '</DIR>' | Code = '</MENU>' THEN
NewParagraph
when Subcode = '</H' THEN DO
NewParagraph
Font SIZE 12
END
when SubCode2 = '<IMG' THEN DO
text " [image] "
END
when Code = '<CENTER>' THEN
CentreJustify
when Code = '</CENTER>' THEN do
NewParagraph
LeftJustify
end
when Code = '<TITLE>' THEN DO
Title = ''
DO UNTIL MyChar = '<'
MyChar = ReadCh('MyFile')
IF MyChar = '&' THEN DO
/* Handle things like " */
Code = MyChar
DO UNTIL MyChar = ';'
MyChar = ReadCh('MyFile')
Code = Code || MyChar
END
/* Convert tag to upper case */
Code = UPPER(Code)
IF Code = '"' THEN TITLE = TITLE || """"
else IF Code = '>' THEN TITLE = TITLE || ">"
else IF Code = '<' THEN TITLE = TITLE || "<"
else IF Code = '&' THEN TITLE = TITLE || "&"
else IF Code = '&POUND;' THEN TITLE = TITLE || "£"
else IF Code = '&AUML;' THEN TITLE = TITLE || "ä"
else IF Code = '&OUML;' THEN TITLE = TITLE || "ö"
else IF Code = '&UUML;' THEN TITLE = TITLE || "ü"
else IF Code = '&S